Task 1. Open science perspectives

600 - 800 word statement

Task 2. Truckee River flow (2000 - 2016)

a) Graph with the decomposed time series information
# Get data
truckee <- read_csv("clean_truckee_flow.csv")

# Convert to ts data
truckee_ts <- ts(truckee$mean_va, frequency = 12, start = c(2000,1))
# plot(truckee_ts)

# Decompose to explore data further
truckee_dc <- decompose(truckee_ts)
plot(truckee_dc)

Although there are some high values and low values in the data, there do not seem to be any outliers (which could have a disproportionate effect on the time series model). There seems to be a slight downward trend in the data and it looks non-stationary and additive. We see a seasonal pattern that repeats about every 12 months and a slight cyclical trend about every five years (the seasonal component is on the same scale as the orinigal data).

b) Forecast the Truckee River for 5 years after the final observation in the dataset
# Holt Winters exponential smoothing
truckee_hw <- HoltWinters(truckee_ts)
# plot(truckee_hw)

# Forecast Holt Winters
truckee_forecast <- forecast(truckee_hw, h = 60)
# plot(truckee_forecast)

# Autoregressive integrated moving average (ARIMA) for comparison
# estimate pdq

truckee_pdq <- auto.arima(truckee_ts) # [2,1,1,][0,0,2]

# fit the ARIMA model
truckee_arima <- arima(truckee_ts, order = c(2,1,1), seasonal = list(order = c(0,0,2)))

# evaluate residuals
# par(mfrow = c(1,2))
# hist(truckee_arima$residuals)
# qqnorm(truckee_arima$residuals) # looks normal

# forecast ARIMA
forecast_truckee <- forecast(truckee_arima, h = 60)
# plot(forecast_truckee) 

# Graph of Holt Winters 
plot(truckee_forecast,
     xlab = "Time",
     ylab = "Truckee River Flows (cubic feet per second)")

c) Holt Winters residuals
par(mfrow = c(1,2))
hist(truckee_forecast$residuals) 
qqnorm(truckee_forecast$residuals) # Looks relatively normally distributed.

Task 3. Mapping California’s National Parks

# Read in nps data
nps_ca <- read_sf(dsn = ".", layer = "nps_boundary") %>%
  filter(STATE == "CA",
         UNIT_TYPE == "National Park") %>% 
  dplyr::select(UNIT_NAME) %>% 
  rename(Name = UNIT_NAME)

st_crs(nps_ca) = 4326

# Read in CA county data
ca_counties <- read_sf(dsn = ".", layer = "california_county_shape_file")

st_crs(ca_counties) = 4326

# Map it!
map_nps_ca <- tm_shape(nps_ca) +
  tm_fill("Name", palette = "Dark2", alpha = 0.5) +
  tm_shape(ca_counties) +
  tm_basemap("Esri.WorldPhysical") +
  tm_legend(show = FALSE) +
  tm_borders()

tmap_mode("view")

map_nps_ca

Task 4. Lizards in the Northern Chihuahuan Desert

# Read in data and initial wrangling
lizard <- read_csv("clean_lizard.csv") %>% 
  filter(site == "CALI") %>% 
  replace_with_na_all(condition = ~.x == ".") %>% 
  filter(sex == "F" | sex == "M") %>% 
  drop_na(weight)

# Coerce weights to be numeric
lizard$weight <- as.numeric(lizard$weight)
a) For all lizards trapped at site ‘CALI’, do weights of male and female adult lizards differ significantly?
# Visualize data
# ggplot(lizard, aes(x = weight, fill = sex)) +
  # geom_histogram(alpha = 0.5, position = "identity")

# Both male and female weights are skewed. However, the central limit theorem applies here - distribution of means will be normally distributed (we have more than 30 observations in each group)

# Vector of adult male lizard weights
male <- lizard %>% 
  filter(sex == "M") %>% 
  pull(weight)

# Vector of adult female lizard weights
female <- lizard %>% 
  filter(sex == "F") %>% 
  pull(weight)

# F test for equal variance
# H0: Ratio of variances (Variance A/Variance B) = 1
# HA: Ratio of variances is NOT = 1

lizard_f <- var.test(female, male)
# p-value = 0.29, retain the null hypothesis of equal variance

# T sample t-test
# H0: Mean weights of adult female and male lizards are equal
# HA: Mean weights of adult female and male lizards are NOT equal

lizard_t <- t.test(female, male, var.equal = TRUE)
# p-value = 0.43, retain the null hypothesis. For lizards trapped at the CALI site, mean weights of female and male adult lizards do not differ significantly.

# Mean weights and Cohen's d - effect size
male_lizard <- lizard %>%
  dplyr::select(sex, weight) %>% 
  filter(sex == "M") 

female_lizard <- lizard %>% 
  dplyr::select(sex, weight) %>% 
  filter(sex == "F") 

male_mean <- mean(male_lizard$weight) # 4.96
male_sd <- sd(male_lizard$weight) # 5.68
female_mean <- mean(female_lizard$weight) # 6.50
female_sd <- sd(female_lizard$weight) #5.83

# Mean weight difference
mean_diff <- female_mean - male_mean # 0.86

effect_size <- cohen.d(weight ~ sex, data = lizard) 
# 0.14 neglibile

Mean adult female lizard weights (6.50 g ± 5.83 g, n = 75) and adult male lizard weights (4.96 g ± 5.68 g, n = 57) for lizards trapped at the Caliche creosotebush site did not differ significantly [t(130) = 0.8, p = 0.427, \(\alpha\) = 0.05]. The effect size is negligible (Cohen’s d = 0.14) and the difference in mean weight between female and male lizards is 0.86 grams.

b) For lizards trapped at the ‘CALI’ site, is there a significant difference in the proportion of adult male and female lizards with broken tails?
lizard_tails <- lizard %>% 
  dplyr::select(sex, tail) %>% 
  drop_na(tail)

# Chi-square
# H0: There is no significant difference between proportion of females and males with broken tails
# HA: There is a significant difference between proportion of females and males with broken tails

chi_tail <- lizard_tails %>%
  count(sex, tail) %>% 
  spread(tail, n) %>% 
  dplyr::select(-sex)

rownames(chi_tail) <- c("F","M")

#Actual proportions:
tail_prop <- prop.table(as.matrix(chi_tail), 1)

tail_x2 <- chisq.test(chi_tail) # Retain the null, there is no significant difference in proportions of females and males with broken tails at the CALI site x2 = 0.163, df = 1, p-value = 0.69

There is no significant difference in the proportion of observed adult female (0.23) and male lizards (0.18) with broken tails at the Caliche creosotebush site (\(\chi^2\){1} = 0.16, p = 0.69).